---
title: "-- Part1 -- BTW 2021 RTR-ML Paper"
output:
  html_document:
    df_print: paged
  word_document: default
  html_notebook: default
---

Load Data Subset and Libaries

```{r}
library(randomForestSRC)
library(dplyr)
library(magrittr)
library(tree)
library(ggplot2)
library(ggalluvial)
```

```{r}
# Home PC
setwd("E:/Fortis/Workspace/ML DOM/BTW 2021")
# Laptop
# setwd("C:/Users/fe300/Desktop/Workspace/ML DOM/BTW 2021")

options(scipen = 999)

## load data
load('dat.full.RData')
load('dat.stream44.RData')

## activate dataset (choose which data)
dat.full -> RTS2021
# dat -> RTS2021
```

```{r}
## Modifikationen des Datensatzes: 
# NAs umwandeln
# Vor- und NAchwahlbefragungsdaten in Faktoren umwandeln
RTS2021[,20:89] -> mod
mod[is.na(mod)] <- "no response"
RTS2021[,20:89] <- lapply(mod, factor)

RTS2021<-RTS2021[!(RTS2021$nach.sieger=="no response"),]
RTS2021$nach.sieger <- factor(RTS2021$nach.sieger)


```


# Section 2: Change of winning preference

## Predicting who changes their winning preference + determining the variables that have the greatest influence on this.



The variable "sieger.change" indicates this change of preference.

```{r, eval=TRUE}

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
## generating the variable for change to Baerbock


RTS2021 -> RTS2021b
RTS2021b$victory.change.AB <- NA

RTS2021b <- RTS2021b %>%
mutate(victory.change.AB = ifelse(vor.sieger == "Annalena Baerbock" &
                                 nach.sieger == "Annalena Baerbock",
                               0, NA))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.AB = ifelse(vor.sieger == "Armin Laschet" &
                                   nach.sieger == "Annalena Baerbock",
                                 1, victory.change.AB))


RTS2021b <- RTS2021b %>%
  mutate(victory.change.AB = ifelse(vor.sieger == "Olaf Scholz" &
                                   nach.sieger == "Annalena Baerbock",
                                 1, victory.change.AB))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.AB = ifelse(vor.sieger == "Unentschieden" &
                                   nach.sieger == "Annalena Baerbock",
                                 1, victory.change.AB))


RTS2021b$victory.change.AB[is.na(RTS2021b$victory.change.AB)] <- 0

RTS2021b$victory.change.AB <- as.factor(RTS2021b$victory.change.AB)

# RTS2021b.test <- select(RTS2021b, vor.sieger, nach.sieger, victory.change.AB)

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
## change to Laschet


RTS2021b$victory.change.AL <- NA

RTS2021b <- RTS2021b %>%
mutate(victory.change.AL = ifelse(vor.sieger == "Armin Laschet" &
                                 nach.sieger == "Armin Laschet",
                               0, NA))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.AL = ifelse(vor.sieger == "Annalena Baerbock" &
                                   nach.sieger == "Armin Laschet",
                                 1, victory.change.AL))


RTS2021b <- RTS2021b %>%
  mutate(victory.change.AL = ifelse(vor.sieger == "Olaf Scholz" &
                                   nach.sieger == "Armin Laschet",
                                 1, victory.change.AL))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.AL = ifelse(vor.sieger == "Unentschieden" &
                                   nach.sieger == "Armin Laschet",
                                 1, victory.change.AL))

RTS2021b$victory.change.AL[is.na(RTS2021b$victory.change.AL)] <- 0
RTS2021b$victory.change.AL <- as.factor(RTS2021b$victory.change.AL)


#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
## change to Scholz

RTS2021b$victory.change.OS <- NA

RTS2021b <- RTS2021b %>%
mutate(victory.change.OS = ifelse(vor.sieger == "Olaf Scholz" &
                                 nach.sieger == "Olaf Scholz",
                               0, NA))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.OS = ifelse(vor.sieger == "Annalena Baerbock" &
                                   nach.sieger == "Olaf Scholz",
                                 1, victory.change.OS))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.OS = ifelse(vor.sieger == "Armin Laschet" &
                                   nach.sieger == "Olaf Scholz",
                                 1, victory.change.OS))

RTS2021b <- RTS2021b %>%
  mutate(victory.change.OS = ifelse(vor.sieger == "Unentschieden" &
                                   nach.sieger == "Olaf Scholz",
                                 1, victory.change.OS))

RTS2021b$victory.change.OS[is.na(RTS2021b$victory.change.OS)] <- 0
RTS2021b$victory.change.OS <- as.factor(RTS2021b$victory.change.OS)


## no change of preference

RTS2021b$no.change <- NA
RTS2021b <- RTS2021b %>%
mutate(no.change = ifelse(vor.sieger == "Olaf Scholz" &
                                 nach.sieger == "Olaf Scholz",
                               1, NA))

RTS2021b <- RTS2021b %>%
mutate(no.change = ifelse(vor.sieger == "Annalena Baerbock" &
                                 nach.sieger == "Annalena Baerbock",
                               1, no.change))

RTS2021b <- RTS2021b %>%
mutate(no.change = ifelse(vor.sieger == "Armin Laschet" &
                                 nach.sieger == "Armin Laschet",
                               1, no.change))

RTS2021b <- RTS2021b %>%
mutate(no.change = ifelse(vor.sieger == "Unentschieden" &
                                 nach.sieger == "Unentschieden",
                               1, no.change))


RTS2021b$no.change[is.na(RTS2021b$no.change)] <- 0
RTS2021b$no.change <- as.factor(RTS2021b$no.change)


## change to draw

RTS2021b$change.to.draw <- NA

RTS2021b <- RTS2021b %>%
mutate(change.to.draw = ifelse(vor.sieger == "Olaf Scholz" &
                                 nach.sieger == "Unentschieden",
                               1, NA))

RTS2021b <- RTS2021b %>%
mutate(change.to.draw = ifelse(vor.sieger == "Annalena Baerbock" &
                                 nach.sieger == "Unentschieden",
                               1, change.to.draw))

RTS2021b <- RTS2021b %>%
mutate(change.to.draw = ifelse(vor.sieger == "Armin Laschet" &
                                 nach.sieger == "Unentschieden",
                               1, change.to.draw))

RTS2021b <- RTS2021b %>%
mutate(change.to.draw = ifelse(vor.sieger == "no response" &
                                 nach.sieger == "Unentschieden",
                               1, change.to.draw))


RTS2021b$change.to.draw[is.na(RTS2021b$change.to.draw)] <- 0
RTS2021b$change.to.draw <- as.factor(RTS2021b$change.to.draw)
```


## English variable names

```{r}
RTS2021c <- dplyr::rename(RTS2021b, 
                          pre.victor = vor.sieger,
                          pre.pol.interest = vor.politikinteresse,
                          pre.gender = vor.geschlecht,
                          pre.age = vor.alter,
                          pre.state = vor.land,
                          
                          pre.chancellor.al = vor.kanzler.al,
                          pre.chancellor.ab = vor.kanzler.ab,
                          pre.chancellor.os = vor.kanzler.os, 
                          
                          pre.al.symp = vor.al.symp,
                          pre.al.gred = vor.al.glaub,
                          pre.al.comp = vor.al.komp,
                          pre.al.lead = vor.al.fuehr,
                          
                          pre.ab.symp = vor.ab.symp,
                          pre.ab.gred = vor.ab.glaub,
                          pre.ab.comp = vor.ab.komp,
                          pre.ab.lead = vor.ab.fuehr,
                          
                          pre.os.symp = vor.os.symp,
                          pre.os.gred = vor.os.glaub,
                          pre.os.comp = vor.os.komp,
                          pre.os.lead = vor.os.fuehr,
                         
                          pre.party.id = vor.pid,
                          pre.party.poll = vor.sonntag,
                          pre.age = vor.alter,
                          post.victor = nach.sieger
                         )

RTS2021c$pre.chancellor.al <- dplyr::recode(RTS2021c$pre.chancellor.al, "3. PrÃ¤ferenz" = "Third Preference",
                                                                        "2. PrÃ¤ferenz" = "Second Preference",
                                                                        "1. PrÃ¤ferenz" = "First Preference")

RTS2021c$pre.chancellor.ab <- dplyr::recode(RTS2021c$pre.chancellor.ab, "3. PrÃ¤ferenz" = "Third Preference",
                                                                        "2. PrÃ¤ferenz" = "Second Preference",
                                                                        "1. PrÃ¤ferenz" = "First Preference")

RTS2021c$pre.chancellor.os <- dplyr::recode(RTS2021c$pre.chancellor.os, "3. PrÃ¤ferenz" = "Third Preference",
                                                                        "2. PrÃ¤ferenz" = "Second Preference",
                                                                        "1. PrÃ¤ferenz" = "First Preference")

RTS2021c$pre.victor <- dplyr::recode(RTS2021c$pre.victor , "Unentschieden" = "draw")
RTS2021c$post.victor <- dplyr::recode(RTS2021c$post.victor , "Unentschieden" = "draw")

RTS2021c$pre.age <- dplyr::recode(RTS2021c$pre.age , "18-21 Jahre" = "18-21 years",
                                                     "22-29 Jahre" = "22-29 years",
                                                     "30-39 Jahre" = "30-39 years",
                                                     "40-49 Jahre" = "40-49 years",
                                                     "50-59 Jahre" = "50-59 years",
                                                     "60-69 Jahre" = "60-69 years",
                                                     "70 und Ã¤lter" = "70 years and older")
```

```{r}
## summary
summary(RTS2021c[,c(20,21,24:39,57:60,383:387)], maxsum = 50)
##
```

## ggplot2 alluvial diagram of preference change


```{r, eval=FALSE, fig.width=11, fig.height=8, dpi=300}
RTS2021c$count = 1


ggplot(data = RTS2021c,
       aes(axis1 = `pre.victor`, axis2 = `post.victor`, y = count)) +
  scale_x_discrete(limits = c("pre-victor", "post-victor"), expand = c(.1, .1)) +
  xlab("switch during the debate") +
  geom_alluvium(aes(color = post.victor)) +
  geom_stratum() +
  scale_color_brewer(palette="Accent") +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)), nudge_x=-.0) +
  # geom_text(aes(label = as.character(labels)),
            # stat = "flow", size = 3, nudge_x = .04) +
  theme(legend.position = "none") +
  ggtitle("Change of winner perception pre- and post-debate",
          "Switch from one candidate to another - color = post-debate alignment")



```


### over-sampling (not applied - tested as a tool tto counter class imbalance - bnut not used in the end)

```{r, eval=FALSE}
## test, don't use in regular setups
add <- filter(RTS2021b, victory.change.OS == 1)
RTS2021c <- rbind(RTS2021b, add, add, add)  # 4 times oversampling
```

###
RANDOM FOREST LEARNING


```{r, eval=TRUE}
## AB
set.seed(100)

## Nur mit Vorbefragungsdaten
rf.2021.vchange.AB.a <- rfsrc(victory.change.AB ~., data=RTS2021c[,c(21,24:39,57:60,383)], ntree = 750, forest=TRUE)

## Nur mit RTS Messung + vor.sieger
rf.2021.vchange.AB.b <- rfsrc(victory.change.AB ~., data=RTS2021c[,c(35,90:382,383)], ntree = 1250, nsplit=30, mtry=250, forest=TRUE)

## Beides kombiniert: Urdaten + RTS (kein nach.kanzler includiert)
rf.2021.vchange.AB.c <- rfsrc(victory.change.AB ~., data=RTS2021c[,c(21,24:39,57:60,90:382,383)], ntree = 1250, nsplit=30, mtry=250, forest=TRUE)

```


```{r}
## AL
set.seed(100)
## Nur mit Vorbefragungsdaten
rf.2021.vchange.AL.a <- rfsrc(victory.change.AL ~., data=RTS2021c[,c(21,24:39,57:60,384)], ntree = 750, forest=TRUE)

## Nur mit RTS Messung + vor.sieger
rf.2021.vchange.AL.b <- rfsrc(victory.change.AL ~., data=RTS2021c[,c(35,90:382,384)], ntree = 1250, nsplit=30, mtry=250, forest=TRUE)

## Beides kombiniert: Urdaten + RTS (kein nach.kanzler includiert)
rf.2021.vchange.AL.c <- rfsrc(victory.change.AL ~., data=RTS2021c[,c(21,24:39,57:60,90:382,384)], ntree = 1250, nsplit=30, mtry=250, forest=TRUE)


```


```{r}
## OS
## Nur mit Vorbefragungsdaten
rf.2021.vchange.OS.a <- rfsrc(victory.change.OS ~., data=RTS2021c[,c(21,24:39,57:60,385)], ntree = 750, forest=TRUE)

## Nur mit RTS Messung + vor.sieger
rf.2021.vchange.OS.b <- rfsrc(victory.change.OS ~., data=RTS2021c[,c(35,90:382,385)], ntree = 1250, nsplit=30, mtry=250, forest=TRUE)

## Beides kombiniert: Urdaten + RTS (kein nach.kanzler includiert)
rf.2021.vchange.OS.c <- rfsrc(victory.change.OS ~., data=RTS2021c[,c(21,24:39,57:60,90:382,385)], ntree = 1250, nsplit=30, mtry=250, forest=TRUE)


```

```{r, eval=FALSE}
## OS with 4 times oversampling
## Beides kombiniert: Urdaten + RTS (kein nach.kanzler includiert)
rf.2021.vchange.OS.c2 <- rfsrc(victory.change.OS ~., data=RTS2021c[,c(20:42,57:60,90:382,385)], ntree = 500, forest=TRUE)

```

## show results

```{r, eval=TRUE}
## Ergebnisse Strukturdaten
rf.2021.vchange.AB.a

```

```{r}
rf.2021.vchange.AL.a
```

```{r}
rf.2021.vchange.OS.a
```


####




```{r, eval=TRUE}
## Ergebnisse RTS
rf.2021.vchange.AB.b

```

```{r, eval=TRUE}
## Ergebnisse RTS
rf.2021.vchange.AL.b

```

```{r, eval=TRUE}
## Ergebnisse RTS
rf.2021.vchange.OS.b

```


### PLOT ROCs

```{r eval=TRUE, dpi=300}
## Ergebnisse der Kombination Urdaten + RTS 
rf.2021.vchange.AB.c 
gg_roc_AB <- ggRandomForests::gg_roc(rf.2021.vchange.AB.c, which.outcome=2)
plot(gg_roc_AB)

```

```{r, dpi=300, eval=TRUE}
## Ergebnisse der Kombination Urdaten + RTS 
rf.2021.vchange.AL.c
gg_roc_AL <- ggRandomForests::gg_roc(rf.2021.vchange.AL.c, which.outcome=2)
plot(gg_roc_AL)

```


```{r, eval=TRUE}
## Ergebnisse der Kombination Urdaten + RTS 
rf.2021.vchange.OS.c
gg_roc_OS <- ggRandomForests::gg_roc(rf.2021.vchange.OS.c, which.outcome=2)
plot(gg_roc_OS)
```

## oversampling
```{r, eval=FALSE}
rf.2021.vchange.OS.c2
```


## not used

```{r, eval=TRUE}
# nur für externe daten
## prediction <- predict.rfsrc(rf.2021.kchange.a, Data[,c(1:12,14:108)])

# new dataframe (backup)
#rf.2021.kchange.c$xvar -> RTS2021.kchange
#cbind(RTS2021.kchange[,6], rf.2021.kchange.c$yvar, rf.2021.kchange.c$predicted, rf.2021.kchange.c$predicted.oob) -> RTS2021b.predictions


#colnames(RTS2021b.predictions) <- c("vor.Kanzler", "kanzler.change", "prediction", "prediction.OOB")

#head(RTS2021b.predictions, n=30)

```

## VAriance Importance MEasures

```{r, eval=TRUE}
## selected Variables
# vimp <- vimp(data, xvar.names = c("V1","V2","V3","V4","V5", "vor.kanzler"))

# all data
vimp.change.AB <- vimp(rf.2021.vchange.AB.c)

```

```{r}
vimp.change.AL <- vimp(rf.2021.vchange.AL.c)
```

```{r}
vimp.change.OS <- vimp(rf.2021.vchange.OS.c)
```

## plot pre.age effect

```{r, dpi=200}
plot.variable(rf.2021.vchange.AB.c, xvar.names = "pre.age", target=2, notch=FALSE)
plot.variable(rf.2021.vchange.AB.c, xvar.names = "V288", target=2)
plot.variable(rf.2021.vchange.AB.c, xvar.names = "V204", target=2)
```




```{r, eval=TRUE}
vimp.change.AB[["importance"]] -> vimp.change.extra
as.data.frame(vimp.change.extra) -> vimp.change.extra
# knitr::kable(vimp.change$importance)

# sorted by importance
# knitr::kable(vimp.change.extra[order(vimp.change.extra$all, decreasing=TRUE),])
knitr::kable(vimp.change.extra[order(vimp.change.extra$`1`, decreasing=TRUE),])
# knitr::kable(vimp.change.extra[order(vimp.change.extra$`0`, decreasing=TRUE),])

vimp.change.AB$importance -> change.importance.data
data.frame(change.importance.data) -> change.importance.data

```

```{r}
vimp.change.AL[["importance"]] -> vimp.change.extra
as.data.frame(vimp.change.extra) -> vimp.change.extra
# knitr::kable(vimp.change$importance)

# sorted by importance
# knitr::kable(vimp.change.extra[order(vimp.change.extra$all, decreasing=TRUE),])
knitr::kable(vimp.change.extra[order(vimp.change.extra$`1`, decreasing=TRUE),])

vimp.change.AL$importance -> change.importance.data
data.frame(change.importance.data) -> change.importance.data

```

```{r}
vimp.change.OS[["importance"]] -> vimp.change.extra
as.data.frame(vimp.change.extra) -> vimp.change.extra
# knitr::kable(vimp.change$importance)

# sorted by importance
# knitr::kable(vimp.change.extra[order(vimp.change.extra$all, decreasing=TRUE),])
knitr::kable(vimp.change.extra[order(vimp.change.extra$`1`, decreasing=TRUE),])

vimp.change.OS$importance -> change.importance.data
data.frame(change.importance.data) -> change.importance.data

```



# The variables listed above were particularly important for predicting the change in winning preference.  


## Decision Tree Visualisierung

```{r, eval=TRUE}
# library(tree)
# kanzler.change.tree <- RTS2021b[,c(25:37,43:50,163:808)]
# na.omit(kanzler.change.tree) -> kanzler.change.tree

# shift.to.merkel = ifelse(kanzler.change.tree$kanzler.change>=0.5, "Yes", "No")
# shift.to.schulz = ifelse(kanzler.change.tree$kanzler.change<=(-0.5), "Yes", "No")


# kanzler.change.tree.b = data.frame(kanzler.change.tree, shift.to.merkel)
# kanzler.change.tree.c = data.frame(kanzler.change.tree, shift.to.schulz)

# as.factor(kanzler.change.tree.b$shift.to.merkel) -> kanzler.change.tree.b$shift.to.merkel

# as.factor(kanzler.change.tree.c$shift.to.schulz) -> kanzler.change.tree.c$shift.to.schulz


```

```{r}
# tree.shift = tree(shift.to.merkel ~., data=kanzler.change.tree.b[,c(1:666,668)])

# tree.shift2 = tree(shift.to.schulz ~., data=kanzler.change.tree.c[,c(1:666,668)])
```

```{r, eval=TRUE,dpi=300, fig.width=10}
# summary(tree.shift)
# summary(tree.shift2)

# plot(tree.shift)
# text(tree.shift, pretty = 0)

# plot(tree.shift2)
# text(tree.shift2, pretty = 0)

# plot(kanzler.change.tree2$V45, kanzler.change.tree2$V78,pch=19,col=as.numeric(kanzler.change.tree2$shift.to.merkel))
# partition.tree(tree2,label="Shift to Merkel",add=TRUE)

```

```{r, eval=TRUE, fig.height=8, fig.width=11, dpi=300}

library(rpart)
library(rattle)
library(rpart.plot)
library(RColorBrewer)


set.seed(100)

## AB

mytree1 <- rpart(
  victory.change.AB ~ ., 
  data = RTS2021c[,c(21,24:39,57:60,90:382,383)], 
  method = "class",
  control = rpart.control(cp = 0.02)
)

mytree2 <- rpart(
  victory.change.AB ~ ., 
  data = RTS2021c[,c(35,90:382,383)], 
  method = "class",
  control = rpart.control(cp = 0.015)
)




## AL

mytree3 <- rpart(
  victory.change.AL ~ ., 
  data = RTS2021c[,c(21,24:39,57:60,90:382,384)], 
  method = "class",
  control = rpart.control(cp = 0.02)
)

mytree4 <- rpart(
  victory.change.AL ~ ., 
  data = RTS2021c[,c(35,90:382,384)], 
  method = "class",
  control = rpart.control(cp = 0.015)
)



## OS

mytree5 <- rpart(
  victory.change.OS ~ ., 
  data = RTS2021c[,c(21,24:39,57:60,90:382,385)], 
  method = "class",
  control = rpart.control(cp = 0.02)
)

mytree6 <- rpart(
  victory.change.OS ~ ., 
  data = RTS2021c[,c(35,90:382,385)], 
  method = "class",
  control = rpart.control(cp = 0.01)
)

# CP tuning parameter adjusted to 0.01 due to no results with higher cp

####

# plot mytree
fancyRpartPlot(mytree1, main="Shift from pre-debate winner expectation 'other' to Baerbock", sub="From Pre-RTS preference draw/OS/AL", caption = NULL)
fancyRpartPlot(mytree2, main="Shift from pre-debate winner expectation 'other' to Baerbock", sub="From Pre-RTS preference draw/OS/AL", caption = NULL)

fancyRpartPlot(mytree3, main="Shift from pre-debate winner expectation 'other' to Laschet", sub="From Pre-RTS preference draw/OS/AB", caption = NULL)
fancyRpartPlot(mytree4, main="Shift from pre-debate winner expectation 'other' to Laschet", sub="From Pre-RTS preference draw/OS/AB", caption = NULL)

fancyRpartPlot(mytree5, main="Shift from pre-debate winner expectation 'other' to Scholz", sub="From Pre-RTS preference draw/AL/AB", caption = NULL)
fancyRpartPlot(mytree6, main="Shift from pre-debate winner expectation 'other' to Scholz", sub="From Pre-RTS preference draw/AL/AB", caption = NULL)
#fancyRpartPlot(mytree4, main="Shift of chancellor preference to Schulz", sub="From Pre-RTS preference -Neither- or -Angela Merkel-", caption = NULL)
#fancyRpartPlot(mytree2, main="Shift of chancellor preference to Merkel", sub="From Pre-RTS preference -Neither- or -Martin Schulz- w/o dichotome change variable", caption = NULL)
#fancyRpartPlot(mytree3, main="Shift of chancellor preference to Schulz", sub="From Pre-RTS preference -Neither- or -Angela Merkel- w/o dichotome change variable", caption = NULL)

#fancyRpartPlot(mytree5, main="Shift of chancellor preference to Merkel (cp=0.01)", sub="From Pre-RTS preference -Neither- or -Martin Schulz-", caption = NULL)
#fancyRpartPlot(mytree6, main="Shift of chancellor preference to Schulz (cp=0.01)", sub="From Pre-RTS preference -Neither- or -Martin Schulz-", caption = NULL)

# prp(mytree2)
# prp(mytree3)

## Plotmo
# library(plotmo)

# plotmo(mytree2, # type = "prob", nresponse = "No", 
#       graphtype2 = "persp",       # type2 = "image"
#       pt.col = ifelse(kanzler.change.tree2$shift.to.merkel == "No", "red", "green"))


#library(treeheatr)

#janitor::clean_names(kanzler.change.tree2) -> kanzler.change.tree3

#kanzler.change.tree3[1:3500,c(13:107,109)] -> last.test
#na.omit(last.test) -> last.test


#test0001 <- heat_tree(last.test, 
#          task="classification",
#          target_lab="shift_to_merkel")



```

## Shift to Baerbock


*###+#+#+#+++#####++##+#+#+
## Shifts for appendix

```{r, eval=TRUE, dpi=300, fig.width=12}
tree.cases.AB <- dplyr::filter(RTS2021c, pre.victor==c("Olaf Scholz","Unentschieden") & V204>1.5)

AL.viz <- dplyr::filter(RTS2021c, V157 >= 0.5 & V288 >= 1.5)

head(AL.viz[,c(1, 35, 384, 386)], 25)
                        
# & vor.ms!=c("-1","-2") & V30<1.5 & V630>1.5 & V330<(-0.5))

#tree.cases2 <- dplyr::filter(kanzler.change.tree.b, vor.kanzler!="Angela Merkel" & V289>1.5 & V361<0.5 & V96<(-0.5))


#tree.cases3 <- dplyr::filter(kanzler.change.tree.b, vor.kanzler!="Angela Merkel" & V289<1.5 & vor.am!=(-2) & vor.am!=(-1) & vor.am!=0 & V88<(-0.5))
                               

#library(lattice)
#heatmap(as.matrix(tree.cases[, 22:666]),Rowv = NA, main="Path 1 - 22 cases")
#heatmap(as.matrix(tree.cases2[, 22:666]),Rowv = NA, main="Path 2 - 7 cases")
#heatmap(as.matrix(tree.cases3[, 22:666]),Rowv = NA, main="Path 2 - 13 cases")
```

## Shift to Baerbock (Cases)

```{r, eval=FALSE}
# print(tree.cases.AB[,c(35,43,293, 383)])
#print(tree.cases2[, c(117,310,382,667:668)])
#print(tree.cases3[, c(14,109,310,382,667:668)])


```


# More figures



```{r, fig.height=6, fig.width=10, dpi=300}
library(ggplot2)

## Pre-victor / pre chancellor AB
qplot.AB0 <- ggplot(RTS2021c, aes(pre.chancellor.ab, pre.victor, colour = RTS2021c$victory.change.AB, alpha=0.8)) 
qplot.AB0 + geom_jitter() + ggtitle("Participants changing winner perception to Annalena Baerbock") + ylab("Pre-evaluation: Who will win the debate?") + xlab("Pre-evaluation: Annalena Baerbock is your ______ as next chancellor") + labs(colour = "Participant switched to Baerbock\n after debate")


# qplot.AB0 


##No1 - Paper
#including jitter
qplot.AB1 <- ggplot(RTS2021c, aes(pre.victor, V204, colour = RTS2021c$victory.change.AB, alpha=0.8)) 
qplot.AB1 + geom_jitter() + geom_hline(yintercept=1.5) + geom_vline(xintercept=(-0.25)) + ggtitle("Participants changing victory perception to Annalena Baerbock") + xlab("Pre-evaluation: Who will win the debate?") + ylab("speaking section V204") + labs(colour = "Participant switched to Baerbock\n after debate")



qplot.AB6 <- ggplot(RTS2021c,aes(V137, V157, colour = RTS2021c$victory.change.AB, alpha=0.8))

qplot.AB6 + geom_jitter() + geom_hline(yintercept=(0.5)) + geom_vline(xintercept=(-0.5)) + ggtitle("Participants changing victory perception to Annalena Baerbock") + xlab("speaking section V137") + ylab("speaking section V157") + labs(colour = "Participant switched to Baerbock\n after debate")

## V137 + V157

qplot.AB <- qplot(RTS2021c$pre.victor, RTS2021b$V204, colour = RTS2021c$victory.change.AB, alpha=0.6)

qplot.AB + geom_hline(yintercept=1.5) + geom_vline(xintercept=(-0.25)) + ggtitle("Participants changing victory perception to Annalena Baerbock") + xlab("Pre-evaluation: Who will win the debate?") + ylab("speaking section V204") + labs(colour = "Participant switched to Baerbock\n after debate")


## No2
qplot.AB2 <- qplot(RTS2021b$V200, RTS2021b$V208, colour = RTS2021b$victory.change.AB, alpha=0.8)

qplot.AB2 + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Participants changing victory perception to Annalena Baerbock") + 
xlab("speaking section V200") + ylab("speaking section V208") + labs(colour = "Participant switched to Baerbock\n after debate")

## No3 - Paper

qplot.AB3 <- qplot(RTS2021b$V204, RTS2021b$V261, colour = RTS2021b$victory.change.AB, alpha=0.8)

qplot.AB3 + geom_hline(yintercept=6) + geom_vline(xintercept=1.5) + ggtitle("Participants changing victory perception to Annalena Baerbock") + 
xlab("speaking section V204") + ylab("speaking section V261") + labs(colour = "Participant switched to Baerbock\n after debate")

## No4 - Paper

qplot.AB4 <- qplot(RTS2021b$V288, RTS2021b$V137, colour = RTS2021b$victory.change.AB, alpha=0.8)

qplot.AB4 + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Participants changing victory perception to Annalena Baerbock") + 
xlab("speaking section V288") + ylab("speaking section V137") + labs(colour = "Participant switched to Baerbock\n after debate")

## No5 - Paper

qplot.AB5 <- qplot(RTS2021b$V207, RTS2021b$V137, colour = RTS2021b$victory.change.AB, alpha=0.8)

qplot.AB5 + geom_hline(yintercept=0.5) + geom_vline(xintercept=(-0.5)) + ggtitle("Participants changing victory perception to Annalena Baerbock") + 
xlab("speaking section V207") + ylab("speaking section V137") + labs(colour = "Participant switched to Baerbock\n after debate")
```

```{r, fig.height=6, fig.width=10, dpi=300}
library(ggplot2)
qplot.AL <- qplot(RTS2021c$V153, RTS2021c$V157, colour = RTS2021c$victory.change.AL, alpha=0.8)

qplot.AL + geom_hline(yintercept=2.5) + geom_vline(xintercept=1.5) + ggtitle("Participants changing victory perception to Armin Laschet") + xlab("speaking section V153") + ylab("speaking section V157") + labs(colour = "Participant switched to Laschet\n after debate")

##No2
qplot.AL2 <- qplot(RTS2021c$V288, RTS2021c$V157, colour = RTS2021c$victory.change.AL, alpha=0.8)

qplot.AL2 + geom_hline(yintercept=1.5) + geom_vline(xintercept=0.5) + ggtitle("Participants changing victory perception to Armin Laschet") + xlab("speaking section V288") + ylab("speaking section V157") + labs(colour = "Participant switched to Laschet\n after debate")

## jitter V157 + V288
qplot.AL2J <- ggplot(RTS2021c, aes(V157, V288, colour = RTS2021c$victory.change.AL, alpha=0.8)) 


qplot.AL2J + geom_jitter() + geom_hline(yintercept=(0.5)) + geom_vline(xintercept=(1.5)) + ggtitle("Participants changing victory perception to Annalena Baerbock") + xlab("speaking section V137") + ylab("speaking section V157") + labs(colour = "Participant switched to Baerbock\n after debate")


```

```{r, fig.height=6, fig.width=10, dpi=300}
library(ggplot2)
## os1
qplot.OS <- qplot(RTS2021c$V244, RTS2021c$V17, colour = RTS2021c$victory.change.OS, alpha=0.8)

qplot.OS + geom_hline(yintercept=0.5) + geom_vline(xintercept=1.5) + ggtitle("Participants changing victory perception to Olaf Scholz") + xlab("speaking section V244") + ylab("speaking section V17") + labs(colour = "Participant switched to Scholz\n after debate")

## os2
qplot.OS2 <- qplot(RTS2021c$V244, RTS2021c$V200, colour = RTS2021c$victory.change.OS, alpha=0.8)

qplot.OS2 + geom_hline(yintercept=0.5) + geom_vline(xintercept=1.5) + ggtitle("Participants changing victory perception to Olaf Scholz") + xlab("speaking section V244") + ylab("speaking section V17") + labs(colour = "Participant switched to Scholz\n after debate")
```




---------------------------
#+#+#+#+#+++#+#+#+#+#+#+#+#
-### Code Experiments and further data tests ###--

###
```{r, dpi=300, fig.width=12, eval=FALSE}
tree.cases11 <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Keinen von beiden" & vor.ms!=(-2) & vor.ms!=(-1) & V30>1.5)


tree.cases12 <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Keinen von beiden" & vor.ms!=(-2) & vor.ms!=(-1) & V30<1.5 & V630>1.5)


tree.cases13 <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Keinen von beiden" & vor.ms!=c("-1","-2") & V30<1.5 & V630>1.5 & V330<(-0.5))
                               
# tree.cases2 <- dplyr::filter(tree.cases2, vor.problem!="Anderes" & vor.problem!="Gesundheit" & vor.problem!="Soziale Gerechtigkeit", vor.problem!="FlÃ¼chtlinge")

library(lattice)
heatmap(as.matrix(tree.cases11[, 22:666]),Rowv = NA, main="Path 1 - 121 cases")
heatmap(as.matrix(tree.cases12[, 22:666]),Rowv = NA, main="Path 2 - 18 cases")
heatmap(as.matrix(tree.cases13[, 22:666]),Rowv = NA, main="Path 3 - 2 cases")
```

```{r, eval=FALSE}
print(tree.cases11[,c(15,51,651,667:668)])
print(tree.cases12[, c(15,51,651,667:668)])

```

###


# Darstellung der Konstellation V289 & V361 bei allen Fällen ohne Vorpräferenz Merkel

```{r, dpi=300, eval=FALSE}
library(ggplot2)
# V289 & V361

tree.cases3a <- dplyr::filter(kanzler.change.tree.b, vor.kanzler!="Angela Merkel") 

qplot1 <- qplot(tree.cases3a$V289, tree.cases3a$V361, colour = tree.cases3a$shift.to.merkel, alpha=0.8)

qplot1 + geom_hline(yintercept=0.5) + geom_vline(xintercept=1.5) + ggtitle("Participants with chancellor pre-preference Schulz or neither") + xlab("speaking section V289") + ylab("speaking section V361") + labs(colour = "Participant switched to Merkel\n after debate")

 
```

Darstellung der Konstellation V30 & V360 bei allen Fällen ohne Vorpräferenz Schulz

```{r, dpi=300, eval=FALSE}
tree.cases4a <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Keinen von beiden")

qplot2 <- qplot(tree.cases4a$vor.ms, tree.cases4a$V30, colour = tree.cases4a$shift.to.schulz, alpha=0.8)

qplot2 + geom_hline(yintercept=1.5) + geom_vline(xintercept=(-0.25)) + ggtitle("Participants with chancellor pre-preference neither") + xlab("Pre-evaluation Martin Schulz (vor.ms)") + ylab("speaking section V30") + labs(colour = "Participant switched to Schulz\n after debate")


tree.cases22 <- dplyr::filter(kanzler.change.tree.c, vor.kanzler=="Angela Merkel")

qplot2 <- qplot(tree.cases22$V30, tree.cases22$V630, colour = tree.cases22$shift.to.schulz, alpha=0.8)

qplot2 + geom_hline(yintercept=1.5) + geom_vline(xintercept=1.5) + ggtitle("Participants with chancellor pre-preference Merkel") + xlab("speaking section V30") + ylab("speaking section V630") + labs(colour = "Participant switched to Schulz\n after debate")

# V78 & V21
#qplot2 <- qplot(kanzler.change.tree2$V78, kanzler.change.tree2$V21, colour = kanzler.change.tree2$shift.to.merkel)

#qplot2 + geom_hline(yintercept=7.5) + geom_vline(xintercept=20.5)

# V45 & V10
#qplot3 <- qplot(kanzler.change.tree2$V45, kanzler.change.tree2$V10, colour = kanzler.change.tree2$kanzler.change)

#qplot3

# V45 & V10 only shift to merkel
#qplot4 <- qplot(V45, V10, data=kanzler.change.tree2[kanzler.change.tree2$kanzler.change>=0.5,], colour = kanzler.change)

#qplot4 + geom_hline(yintercept=1.5) + geom_vline(xintercept=3.5)

# vor.kanzler & V78

#qplot5 <- qplot(vor.kanzler, V78, data=kanzler.change.tree2, colour = kanzler.change)

#qplot5 
```


## Resterampe
### Vergleich mit Vorhersage von nach.kanzler (noch nicht fertig)

```{r}
# vimp1 <- vimp(rf.test.2020.kanzler2)

```

```{r}

# vimp1[["importance"]] -> vimp1data

# as.data.frame(vimp1data) -> vimp1data

# top_n(vimp1data[,2:1], 10) # all
# top_n(vimp1data[,1:2], 10) # Angela Merkel
# top_n(vimp1data[,1:3], 10) # Keiner von beiden
```

### Mit Imputation arbeiten

## Arbeiten mit dem NA count um rauszubekommen wo viele NAs sind

```{r}
## df$na_count <- apply(is.na(df), 1, sum)
```
